In [8]:
import matplotlib.pyplot as plt  
import csv 
import operator 
import datetime as dt  
import pandas as pd
import numpy as np
import openpyxl


# Import
# ======

# essential libraries
import math
import random
from datetime import timedelta

# storing and anaysis
import numpy as np
import pandas as pd

# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import calmap
import folium

# color pallette
cnf, dth, rec, act = '#393e46', '#ff2e63', '#21bf73', '#fe9801' 

# converter
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()   

# hide warnings
import warnings
warnings.filterwarnings('ignore')

df = pd.read_excel('C:\\Users\\TJ\\Desktop\\covid\\covid.xlsx')
In [6]:
pip install folium
Collecting folium
  Downloading folium-0.10.1-py2.py3-none-any.whl (91 kB)
Requirement already satisfied: jinja2>=2.9 in c:\users\tj\anaconda3\lib\site-packages (from folium) (2.11.1)
Requirement already satisfied: numpy in c:\users\tj\anaconda3\lib\site-packages (from folium) (1.18.1)
Collecting branca>=0.3.0
  Downloading branca-0.4.0-py3-none-any.whl (25 kB)
Requirement already satisfied: requests in c:\users\tj\anaconda3\lib\site-packages (from folium) (2.22.0)
Requirement already satisfied: MarkupSafe>=0.23 in c:\users\tj\anaconda3\lib\site-packages (from jinja2>=2.9->folium) (1.1.1)
Requirement already satisfied: six in c:\users\tj\anaconda3\lib\site-packages (from branca>=0.3.0->folium) (1.14.0)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\tj\anaconda3\lib\site-packages (from requests->folium) (2019.11.28)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\users\tj\anaconda3\lib\site-packages (from requests->folium) (3.0.4)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\users\tj\anaconda3\lib\site-packages (from requests->folium) (1.24.3)
Requirement already satisfied: idna<2.9,>=2.5 in c:\users\tj\anaconda3\lib\site-packages (from requests->folium) (2.8)
Installing collected packages: branca, folium
Successfully installed branca-0.4.0 folium-0.10.1
Note: you may need to restart the kernel to use updated packages.
In [2]:
pip install plotly
Collecting plotly
  Downloading plotly-4.6.0-py2.py3-none-any.whl (7.1 MB)
Collecting retrying>=1.3.3
  Downloading retrying-1.3.3.tar.gz (10 kB)
Requirement already satisfied: six in c:\users\tj\anaconda3\lib\site-packages (from plotly) (1.14.0)
Building wheels for collected packages: retrying
  Building wheel for retrying (setup.py): started
  Building wheel for retrying (setup.py): finished with status 'done'
  Created wheel for retrying: filename=retrying-1.3.3-py3-none-any.whl size=11435 sha256=90908f9caf4c582f9072351badfee25ab88f5e892b826171001675fa3a4f3fc1
  Stored in directory: c:\users\tj\appdata\local\pip\cache\wheels\f9\8d\8d\f6af3f7f9eea3553bc2fe6d53e4b287dad18b06a861ac56ddf
Successfully built retrying
Installing collected packages: retrying, plotly
Successfully installed plotly-4.6.0 retrying-1.3.3
Note: you may need to restart the kernel to use updated packages.
In [9]:
df.head()
Out[9]:
iso_code location date total_cases new_cases total_deaths new_deaths total_cases_per_million new_cases_per_million total_deaths_per_million new_deaths_per_million total_tests new_tests total_tests_per_thousand new_tests_per_thousand tests_units
0 ABW Aruba 2020-03-13 2 2 0 0 18.733 18.733 0.0 0.0 NaN NaN NaN NaN NaN
1 ABW Aruba 2020-03-20 4 2 0 0 37.465 18.733 0.0 0.0 NaN NaN NaN NaN NaN
2 ABW Aruba 2020-03-24 12 8 0 0 112.395 74.930 0.0 0.0 NaN NaN NaN NaN NaN
3 ABW Aruba 2020-03-25 17 5 0 0 159.227 46.831 0.0 0.0 NaN NaN NaN NaN NaN
4 ABW Aruba 2020-03-26 19 2 0 0 177.959 18.733 0.0 0.0 NaN NaN NaN NaN NaN
In [10]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12669 entries, 0 to 12668
Data columns (total 16 columns):
iso_code                    12669 non-null object
location                    12669 non-null object
date                        12669 non-null datetime64[ns]
total_cases                 12669 non-null int64
new_cases                   12669 non-null int64
total_deaths                12669 non-null int64
new_deaths                  12669 non-null int64
total_cases_per_million     12462 non-null float64
new_cases_per_million       12462 non-null float64
total_deaths_per_million    12462 non-null float64
new_deaths_per_million      12462 non-null float64
total_tests                 2966 non-null float64
new_tests                   2630 non-null float64
total_tests_per_thousand    2966 non-null float64
new_tests_per_thousand      2634 non-null float64
tests_units                 2966 non-null object
dtypes: datetime64[ns](1), float64(8), int64(4), object(3)
memory usage: 1.5+ MB
In [11]:
a=df['location']
In [12]:
temp = df.groupby('date')['total_cases', 'total_deaths'].sum().reset_index()
In [13]:
temp
Out[13]:
date total_cases total_deaths
0 2019-12-31 27 0
1 2020-01-01 27 0
2 2020-01-02 27 0
3 2020-01-03 44 0
4 2020-01-04 44 0
... ... ... ...
110 2020-04-19 2280936 159504
111 2020-04-20 2355082 164649
112 2020-04-21 2432356 169844
113 2020-04-22 2519743 176779
114 2020-04-23 2587372 182801

115 rows × 3 columns

In [14]:
temp = temp.melt(id_vars="date", value_vars=['total_cases', 'total_deaths'],
                 var_name='Case', value_name='Count')
temp.head()
Out[14]:
date Case Count
0 2019-12-31 total_cases 27
1 2020-01-01 total_cases 27
2 2020-01-02 total_cases 27
3 2020-01-03 total_cases 44
4 2020-01-04 total_cases 44
In [15]:
fig = px.area(temp, x="date", y="Count", color='Case', height=600,
             title='Cases over time', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()
In [16]:
# ========

# table
day_wise = df.groupby('date')['total_cases', 'total_deaths','new_cases'].sum().reset_index()

# number cases per 100 cases
day_wise['total_cases / total_deaths'] = round((day_wise['total_cases']/day_wise['total_deaths'])*100, 2)
# # no. of countries
day_wise['No. of countries'] = df[df['total_cases']!=0].groupby('date')['location'].unique().apply(len).values

day_wise['total_deaths / total_cases'] = round((day_wise['total_deaths']/day_wise['total_cases'])*100, 2)



# # fillna by 0
cols = ['total_cases / total_deaths']
day_wise[cols] = day_wise[cols].fillna(0)

day_wise.head(50)
Out[16]:
date total_cases total_deaths new_cases total_cases / total_deaths No. of countries total_deaths / total_cases
0 2019-12-31 27 0 27 inf 1 0.00
1 2020-01-01 27 0 0 inf 1 0.00
2 2020-01-02 27 0 0 inf 1 0.00
3 2020-01-03 44 0 17 inf 1 0.00
4 2020-01-04 44 0 0 inf 1 0.00
5 2020-01-05 59 0 15 inf 1 0.00
6 2020-01-06 59 0 0 inf 1 0.00
7 2020-01-07 59 0 0 inf 1 0.00
8 2020-01-08 59 0 0 inf 1 0.00
9 2020-01-09 59 0 0 inf 1 0.00
10 2020-01-10 59 0 0 inf 1 0.00
11 2020-01-11 59 1 0 5900.00 1 1.69
12 2020-01-12 59 1 0 5900.00 1 1.69
13 2020-01-13 60 1 1 6000.00 2 1.67
14 2020-01-14 60 1 0 6000.00 2 1.67
15 2020-01-15 61 2 1 3050.00 3 3.28
16 2020-01-16 61 2 0 3050.00 3 3.28
17 2020-01-17 66 2 5 3300.00 3 3.03
18 2020-01-18 83 2 17 4150.00 3 2.41
19 2020-01-19 219 3 136 7300.00 3 1.37
20 2020-01-20 239 3 20 7966.67 4 1.26
21 2020-01-21 392 6 153 6533.33 6 1.53
22 2020-01-22 534 17 142 3141.18 6 3.18
23 2020-01-23 631 17 97 3711.76 6 2.69
24 2020-01-24 897 26 266 3450.00 8 2.90
25 2020-01-25 1350 41 453 3292.68 12 3.04
26 2020-01-26 2023 56 673 3612.50 13 2.77
27 2020-01-27 2820 81 797 3481.48 14 2.87
28 2020-01-28 4587 106 1767 4327.36 17 2.31
29 2020-01-29 6067 132 1480 4596.21 17 2.18
30 2020-01-30 7823 170 1756 4601.76 20 2.17
31 2020-01-31 9826 213 2003 4613.15 22 2.17
32 2020-02-01 11946 259 2120 4612.36 25 2.17
33 2020-02-02 14554 305 2608 4771.80 25 2.10
34 2020-02-03 17372 362 2818 4798.90 25 2.08
35 2020-02-04 20615 427 3243 4827.87 26 2.07
36 2020-02-05 24512 493 3897 4972.01 26 2.01
37 2020-02-06 28253 565 3741 5000.53 26 2.00
38 2020-02-07 31430 638 3177 4926.33 26 2.03
39 2020-02-08 34869 724 3439 4816.16 26 2.08
40 2020-02-09 37488 813 2619 4611.07 26 2.17
41 2020-02-10 40470 910 2982 4447.25 26 2.25
42 2020-02-11 42970 1018 2500 4221.02 26 2.37
43 2020-02-12 45003 1115 2033 4036.14 26 2.48
44 2020-02-13 60154 1370 15151 4390.80 26 2.28
45 2020-02-14 64322 1383 4168 4650.90 26 2.15
46 2020-02-15 66882 1527 2560 4379.96 27 2.28
47 2020-02-16 68910 1669 2028 4128.82 27 2.42
48 2020-02-17 70977 1775 2067 3998.70 27 2.50
49 2020-02-18 72873 1873 1896 3890.71 27 2.57
In [17]:
fig_c = px.bar(day_wise, x="date", y="total_cases", color_discrete_sequence = [act])
fig_d = px.bar(day_wise, x="date", y="total_deaths", color_discrete_sequence = [dth])

fig = make_subplots(rows=1, cols=2, shared_xaxes=False, horizontal_spacing=0.1,
                    subplot_titles=('Confirmed cases', 'Deaths reported'))

fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)

fig.update_layout(height=480)
fig.show()
In [18]:
fig_1 = px.line(day_wise, x="date", y='total_deaths / total_cases', color_discrete_sequence = [dth])

fig = make_subplots(rows=1, cols=1, shared_xaxes=False, 
                    subplot_titles=('치사율'))

fig.add_trace(fig_1['data'][0], row=1, col=1)

fig.update_layout(height=480)
fig.show()
In [19]:
fig_c = px.bar(day_wise, x="date", y="new_cases", color_discrete_sequence = [act])

fig = make_subplots(rows=1, cols=1, shared_xaxes=False, horizontal_spacing=0.5,
                    subplot_titles=("No. of new cases everyday"))

fig.add_trace(fig_c['data'][0], row=1, col=1)

fig.update_layout(height=480)
fig.show()
In [20]:
# Country wise
# ============

# getting latest values
country_wise = df[df['date']==max(df['date'])].reset_index(drop=True).drop('date', axis=1)

country_wise = country_wise.groupby('location')['total_cases','total_deaths','new_cases'].sum().reset_index()

country_wise.head(100)
Out[20]:
location total_cases total_deaths new_cases
0 Afghanistan 1176 40 84
1 Albania 634 27 25
2 Algeria 2910 402 99
3 Andorra 723 37 6
4 Angola 24 2 0
... ... ... ... ...
95 Italy 187327 25085 3370
96 Jamaica 252 6 19
97 Japan 11772 287 276
98 Jersey 255 18 0
99 Jordan 428 7 3

100 rows × 4 columns

In [21]:
country_wise.shape
Out[21]:
(205, 4)
In [22]:
fig_c = px.bar(country_wise.sort_values('total_cases').tail(30), x="total_cases", y="location", 
               text='total_cases', orientation='h', color_discrete_sequence = [act])
fig_d = px.bar(country_wise.sort_values('total_deaths').tail(30), x="total_deaths", y="location", 
               text='total_deaths', orientation='h', color_discrete_sequence = [dth])

fig = make_subplots(rows=1, cols=2, shared_xaxes=False, horizontal_spacing=0.14, vertical_spacing=0.08,
                    subplot_titles=('total_cases', 'total_deaths'))
fig.add_trace(fig_c['data'][0], row=1, col=1)
fig.add_trace(fig_d['data'][0], row=1, col=2)
In [23]:
fig_c = px.bar(country_wise.sort_values('new_cases').tail(15), x="new_cases", y="location", 
               text='new_cases', orientation='h', color_discrete_sequence = [act])

fig = make_subplots(rows=1, cols=1, shared_xaxes=False, horizontal_spacing=0.14, vertical_spacing=0.08,
                    subplot_titles=('new_cases'))
fig.add_trace(fig_c['data'][0], row=1, col=1)
In [24]:
fig = px.bar(df, x="date", y="total_cases", color='location', height=800,
             title='total_cases', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

# =========================================

fig = px.bar(df, x="date", y="total_deaths", color='location', height=800,
             title='total_deaths', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

# =========================================

fig = px.bar(df, x="date", y="new_cases", color='location', height=800,
             title='new_cases', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()
In [ ]: